SETUP

library(bigrquery)
library(DBI)
library(dplyr)
library(ggplot2)
library(tidyverse)
library(lubridate)
library(plotly)

con <- dbConnect(
  bigrquery::bigquery(),
  project = "huhl-course",
  dataset = "SWEDBANK",
  billing = "huhl-course"
)
dbListTables(con)
## [1] "agreement_valuta"    "default_types"       "defaults"           
## [4] "defaultsWithSEK"     "defaults_compressed" "exchangerates"
# Colors
GRAY1 = "#231e20"
GRAY2 = "#414040"
GRAY3 = "#555655"
GRAY4 = "#646369"
GRAY5 = "#76787B"
GRAY6 = "#828282"
GRAY7 = "#929497"
GRAY8 = "#a6a6a5"
GRAY9 = "#bfbebe"
RED1 = "#c3514e"
RED2 = "#e6bab7"
RED3 = "#800d00"
GREEN1 = "#0c8040"
GREEN2 = "#9abb59"
GREEN3 = "#31859c"
GREEN4 = "#4bacc5"
GREEN5 = "#93cddd"
ORANGE1 = "#f79747"
ORANGE2 = "#fac090"

theme_swd <- function() {
  theme_minimal(base_size = 11, base_family = "Helvetica") +
    theme(
      panel.grid.major = element_line(size = 0.1, color = GRAY9),
      panel.grid.minor = element_blank(),
      axis.line = element_line(size = .13, color = GRAY8),
      axis.text = element_text(color = GRAY7),
      axis.ticks.x = element_line(size = 1, color = GRAY8),
      axis.ticks.y = element_line(size = 1, color = GRAY8),
      axis.title = element_text(color = GRAY3),
      axis.title.y = element_text(hjust = 1, margin = margin(0, 6, 0, 15, "pt")),
      axis.title.x = element_text(hjust = 0, margin = margin(6, 0, 15, 0, "pt")),
      plot.subtitle = element_text(color = GRAY4, size= 11),
      plot.title = element_text(color = GRAY4, size= 15),
      plot.title.position = "plot", # This aligns the plot title to the very left edge
      plot.caption = element_text(hjust = 0, color = GRAY6),
      plot.caption.position = "plot",
      plot.margin = margin(.5,.5,.5,.5,"cm"),
      strip.text = element_text(color = GRAY7)) 
}
# For analysis

sql <- "
SELECT AgreementGenId,
       DefaultDate,
       DefaultEndDate,
       DefaultRankNum,
       DefaultTypeCd,
       ValutaKod,
       LossAmount,
       EAD,
       CASE WHEN Valuation IS NULL THEN LossAmount
                     ELSE LossAmount*Valuation END AS LossInSEK,
       CASE WHEN Valuation IS NULL THEN EAD
                     ELSE EAD*Valuation END AS EADInSEK
FROM `huhl-course.SWEDBANK.defaults` AS def
  LEFT JOIN `huhl-course.SWEDBANK.exchangerates` AS exch 
  ON def.DefaultDate = exch.ReportDate AND LOWER(def.ValutaKod) = LOWER(exch.CurrencyCd)
"

defaults <- dbGetQuery(con, sql)

SOLUTIONS

1. How many distinct defaults are in the dataset defaults?

sql <- "
SELECT COUNT(DISTINCT AgreementGenId) 
FROM `huhl-course.SWEDBANK.defaults`
"
dbGetQuery(con, sql)
## # A tibble: 1 × 1
##     f0_
##   <int>
## 1   623

2. Make a distribution graph of how many times agreements default.

sql <- "
SELECT COUNT(*) AS n 
FROM `huhl-course.SWEDBANK.defaults` GROUP BY AgreementGenId
"
to_visualize <- dbGetQuery(con, sql)

to_visualize %>% 
  ggplot(aes(x=n, fill=ORANGE1)) +
  geom_bar() +
  theme_swd()+
  scale_fill_identity() +
  xlab("How many defaults one agreement had") +
  ylab("Agreements count")

3. Due to regulations, defaults that have less or equal to 9 months between the end date and new defaults start date must be merged into one with the first defaults start date and the latest end date. Perform this 9 month aggregation on default data with the following rules for other variables:

  • The EAD and loss amount come from the first default.
  • The DefaultTypeCd must be the riskiest one. The riskiest default is the one with the lowest ranking number.
sql <- "
WITH tbl1 AS (
  SELECT AgreementGenId,
         DefaultDate,
         DefaultEndDate,
         DefaultRankNum,
         ValutaKod,
         LossAmount,
         EAD,
         LAG(DefaultEndDate,1, '0001-01-01') OVER (ORDER BY AgreementGenId) AS lag_day
  FROM huhl-course.SWEDBANK.defaults
), tbl2 AS (
  SELECT *, 
         CASE WHEN DATE_DIFF(lag_day, DefaultDate, MONTH) < 9 THEN 1 ELSE -0 END AS gflag
  FROM tbl1 
),tbl3 AS (
  SELECT *,
         SUM(CASE WHEN gflag=0 THEN 1 ELSE 0 END) OVER(ORDER BY AgreementGenId) AS gid
  FROM tbl2
)
SELECT MIN(tbl3.AgreementGenId) AS AgreementGenId,
       MIN(DefaultDate) AS DefaultDate,
       MAX(DefaultEndDate) AS DefaultEndDate,
       MIN(tbl3.DefaultRankNum) AS DefaultRankNum,
       ANY_VALUE(default_type.DefaultTypeCd) AS DefaultTypeCd,
       ANY_VALUE(tbl3.ValutaKod) AS ValutaKod,
       MIN(LossAmount) AS LossAmount,
       MIN(EAD) AS EAD
FROM tbl3 
  LEFT JOIN `huhl-course.SWEDBANK.default_types` AS default_type
  ON tbl3.DefaultRankNum = default_type.DefaultRankNum
GROUP BY gid
"

defaults_compressed <- dbGetQuery(con, sql)

4. What is the new number of unique defaults?

sql <- "
SELECT COUNT(DISTINCT AgreementGenId) 
FROM `huhl-course.SWEDBANK.defaults_compressed`
"

dbGetQuery(con, sql)
## # A tibble: 1 × 1
##     f0_
##   <int>
## 1   461

5. Analyse EAD-s and loss amounts.

Histograms

Loss Amount in SEK

Loss Amount by currency

EAD in SEK

EAD by currency

Loss and EAD in time

Loss amount in SEK

EAD in SEK

Means and medians for all time

All in SEK
LOSS

EAD

SEK
LOSS

EAD

EUR
LOSS

EAD

NOK
LOSS

EAD

USD
LOSS

EAD

Loss and EAD by default type

Loss Amount

EAD

6. Make a new binary variable that shows if the default is open now. How many defaults are closed and how many are open?

sql <- "
WITH tbl1 AS (SELECT *,
       CASE WHEN DefaultEndDate > CURRENT_DATE() THEN 0
        ELSE 1 END AS closed 
FROM `huhl-course.SWEDBANK.defaults`)
SELECT any_value(closed) AS IsClosed,
       COUNT(*) AS total
FROM tbl1 GROUP BY closed
"

dbGetQuery(con, sql) 
## # A tibble: 2 × 2
##   IsClosed total
##      <int> <int>
## 1        1   863
## 2        0    85

7. Add 1.6% of EAD to each loss amount. Find the sum of new loss amount and old. Also calculate the loss percentages on total level and on default type level with the new loss amount.

sql <- "
WITH tbl1 AS (
  SELECT
    LossInSEK,
    LossInSEK+0.016*EADInSEK AS NewLossInSEK,
    EADInSEK,
    DefaultTypeCd
  FROM `huhl-course.SWEDBANK.defaultsWithSEK`
)
SELECT SUM(LossInSEK) AS SumLossInSEK,
       SUM(NewLossInSEK) AS NewSumLossInSEK
FROM tbl1
"

dbGetQuery(con, sql) 
## # A tibble: 1 × 2
##   SumLossInSEK NewSumLossInSEK
##          <dbl>           <dbl>
## 1     8205175.        8437386.
sql <- "
WITH tbl1 AS (
  SELECT
    LossInSEK,
    LossInSEK+0.016*EADInSEK AS NewLossInSEK,
    EADInSEK,
    DefaultTypeCd
  FROM `huhl-course.SWEDBANK.defaultsWithSEK`
)
SELECT DefaultTypeCd,
       SUM(NewLossInSEK)/SUM(EADInSEK) AS Percentage
FROM tbl1 GROUP BY DefaultTypeCd
UNION ALL
SELECT 'ALL' AS DefaultTypeCd,
       SUM(NewLossInSEK)/SUM(EADInSEK) AS Percentage
FROM tbl1
"

dbGetQuery(con, sql) 
## # A tibble: 7 × 2
##   DefaultTypeCd Percentage
##   <chr>              <dbl>
## 1 KK                 0.698
## 2 SS                 0.304
## 3 RFF99              0.016
## 4 HAF                0.964
## 5 D90                0.743
## 6 CON                0.434
## 7 ALL                0.581

8. Analyse the data and describe the relevant/important characteristics.

Defaults types analysis

Defaults count by type

defaults count change in time by type